rm(list=ls(all=T))
library(sf)
library(tidyverse)
library(terra)
library(nhdplusTools)
library(mapview)
library(dataRetrieval)
library(lubridate)
library(prism)
library(ggspatial)
library(nngeo)# Added from original code
library(stars)# Added from original code
# this gives you an error, but it can be ignored:
try(plyr::ldply(list.files(path="src/",
                           pattern="*.R",
                           full.names=TRUE),
                source))
FALSE Error in list_to_dataframe(res, attr(.data, "split_labels"), .id, id_as_factor) : 
FALSE   Results must be all atomic, or all data frames
# Rmarkdown options
knitr::opts_chunk$set(echo = T, warning = F, comment = F, message = F)

# mapview options
mapviewOptions(basemaps.color.shuffle=FALSE,basemaps='OpenTopoMap')

Setting up your site data set.

For this code to run properly, your site data must be configured as follows:

  1. Each site is identified with a unique site name. In the data set, this column must be called site.
  2. Each site has their known COMID, with column name comid.
  3. Site data table is a CSV, and stored in the data/ folder.

Downloading necessary data sets

Currently, this workflow requires downloading several data sets locally for much speedier run times. This includes: PRISM climate & aridity rasters, NHD flow direction data, and CONUS-wide NHD catchments. All data sets are found in the shared data folder.

National Hydrodraphy Dataset (NHD) data extraction

Use COMID to allow site linkages with all datasets in this workflow.

sf_use_s2(FALSE)
site_type = "comid" # This steps assumes you have checked your COMIDs
sites <- read_csv("v4_RCSFA_Geospatial_Data_Package/v4_RCSFA_Geospatial_Site_Information.csv")

# Filter the sites that are too small to have COMIDS or are outside of the US. Also select and change column names to match the code needs

sites = sites %>% filter(COMID!=-9999) %>% dplyr::select(site = 'Site_ID', comid = 'COMID')

# Remove duplicated COMID
subset_sites <- sites %>% #remove comid duplicates (i.e., samples located in the same catchment)
    distinct(comid,.keep_all=T)
# changing for code compatibility
all_sites = sites 
sites = subset_sites

Pull all meta data associated with each site’s COMID.

if(site_type == "comid"){
  sites <- getNHDcomid(df = dplyr::select(sites, site, comid))
}
FALSE [1] "290 locations linked to the NHD."

Make NHD-based watershed shapefiles for all CONUS sites. To make this step MUCH faster, it is best to have a locally downloaded version on the National NHD catchment shapefile stored on your local system. I have already included this shapefile in the data folder.

site_watersheds <- getWatersheds(df = sites, make_pretty = TRUE) %>%
  inner_join(., select(sf::st_drop_geometry(sites), site, comid), by = "comid")

Exporting shape files

st_write(site_watersheds, dsn = "shape/", layer = "v4_RCSFA_shp", driver = "ESRI Shapefile")
FALSE Writing layer `v4_RCSFA' to data source `shape/' using driver `ESRI Shapefile'
FALSE Writing 290 features with 2 fields and geometry type Unknown (any).

Map all your sites. Maps are automatically stored in the data/maps/ folder. It is highly recommended to review each map, particularly for known locations along BIG rivers and TINY streams.Note: COMIDs should have been checked before running this code.

suppressWarnings(map2(sites$site, sites$comid, getMaps)) 

Interactive map showing all sites and their delineated watersheds:

mapview(site_watersheds, col.regions = "#56B4E9", alpha.regions = 0.2, lwd = 3, layer.name = "Watershed") +
  mapview(sites, cex = 5, col.regions = "black", layer.name = "Points") + 
  mapview(st_read('data/site_flowlines.gpkg', quiet = T), lwd = 3, color = "red", layer.name = "Flowline")

StreamCat data extractions

This section uses the R package StreamCat Tools. Consult GitHub for information regarding package citation.

# install from GitHub the first time
library(remotes)
#install_github("USEPA/StreamCatTools", build_vignettes=FALSE)
library(StreamCatTools)

# Get parameters available and subset the list of variables of interest
params <- sc_get_params(param='name')
# Land cover classes
nlcd2019 = unique(params[grep('2019',params)])
nlcd2019 = nlcd2019[3:length(nlcd2019)]
# Fire related
fire = unique(params[grep('pctburnareamtbs|pctlowsevmtbs|pctmodsevmtbs|pcthighsevmtbs',params)])

# Extract data
# need to add each metric manually
df <- sc_get_data(metric ='bfi,elev,runoff,om,pctbl2019,pctconif2019,pctcrop2019,pctdecid2019,pctgrs2019,pcthay2019,pcthbwet2019,pctice2019,pctimp2019,pctimp2019slp10,pctimp2019slp20,pctmxfst2019,pctow2019,pctshrb2019,pcturbhi2019,pcturblo2019,pcturbmd2019,pcturbop2019,pctwdwet2019',
aoi = 'cat,ws', comid = sites$comid,
showAreaSqKm = TRUE, showPctFull = TRUE)

df2 <- sc_get_data(metric ='pctburnareamtbs1984,pctburnareamtbs1985,pctburnareamtbs1986,pctburnareamtbs1987,pctburnareamtbs1988,pctburnareamtbs1989,pctburnareamtbs1990,pctburnareamtbs1991,pctburnareamtbs1992,pctburnareamtbs1993,pctburnareamtbs1994,pctburnareamtbs1995,pctburnareamtbs1996,pctburnareamtbs1997,pctburnareamtbs1998,pctburnareamtbs1999,pctburnareamtbs2000,pctburnareamtbs2001,pctburnareamtbs2002,pctburnareamtbs2003,pctburnareamtbs2004,pctburnareamtbs2005,pctburnareamtbs2006,pctburnareamtbs2007,pctburnareamtbs2008,pctburnareamtbs2009,pctburnareamtbs2010,pctburnareamtbs2011,pctburnareamtbs2012,pctburnareamtbs2013,pctburnareamtbs2014,pctburnareamtbs2015,pctburnareamtbs2016,pctburnareamtbs2017,pctburnareamtbs2018,pcthighsevmtbs1984,pcthighsevmtbs1985,pcthighsevmtbs1986,pcthighsevmtbs1987,pcthighsevmtbs1988,pcthighsevmtbs1989,pcthighsevmtbs1990,pcthighsevmtbs1991,pcthighsevmtbs1992,pcthighsevmtbs1993,pcthighsevmtbs1994,pcthighsevmtbs1995,pcthighsevmtbs1996,pcthighsevmtbs1997,pcthighsevmtbs1998,pcthighsevmtbs1999,pcthighsevmtbs2000,pcthighsevmtbs2001,pcthighsevmtbs2002,pcthighsevmtbs2003,pcthighsevmtbs2004,pcthighsevmtbs2005,pcthighsevmtbs2006,pcthighsevmtbs2007,pcthighsevmtbs2008,pcthighsevmtbs2009,pcthighsevmtbs2010,pcthighsevmtbs2011,pcthighsevmtbs2012,pcthighsevmtbs2013,pcthighsevmtbs2014,pcthighsevmtbs2015,pcthighsevmtbs2016,pcthighsevmtbs2017,pcthighsevmtbs2018,pctlowsevmtbs1984,pctlowsevmtbs1985,pctlowsevmtbs1986,pctlowsevmtbs1987,pctlowsevmtbs1988,pctlowsevmtbs1989,pctlowsevmtbs1990,pctlowsevmtbs1991,pctlowsevmtbs1992,pctlowsevmtbs1993,pctlowsevmtbs1994,pctlowsevmtbs1995,pctlowsevmtbs1996,pctlowsevmtbs1997,pctlowsevmtbs1998,pctlowsevmtbs1999,pctlowsevmtbs2000,pctlowsevmtbs2001,pctlowsevmtbs2002,pctlowsevmtbs2003,pctlowsevmtbs2004,pctlowsevmtbs2005,pctlowsevmtbs2006,pctlowsevmtbs2007,pctlowsevmtbs2008,pctlowsevmtbs2009,pctlowsevmtbs2010,pctlowsevmtbs2011,pctlowsevmtbs2012,pctlowsevmtbs2013,pctlowsevmtbs2014,pctlowsevmtbs2015,pctlowsevmtbs2016,pctlowsevmtbs2017,pctlowsevmtbs2018,pctmodsevmtbs1984,pctmodsevmtbs1985,pctmodsevmtbs1986,pctmodsevmtbs1987,pctmodsevmtbs1988,pctmodsevmtbs1989,pctmodsevmtbs1990,pctmodsevmtbs1991,pctmodsevmtbs1992,pctmodsevmtbs1993,pctmodsevmtbs1994,pctmodsevmtbs1995,pctmodsevmtbs1996,pctmodsevmtbs1997,pctmodsevmtbs1998,pctmodsevmtbs1999,pctmodsevmtbs2000,pctmodsevmtbs2001,pctmodsevmtbs2002,pctmodsevmtbs2003,pctmodsevmtbs2004,pctmodsevmtbs2005,pctmodsevmtbs2006,pctmodsevmtbs2007,pctmodsevmtbs2008,pctmodsevmtbs2009,pctmodsevmtbs2010,pctmodsevmtbs2011,pctmodsevmtbs2012,pctmodsevmtbs2013,pctmodsevmtbs2014,pctmodsevmtbs2015,pctmodsevmtbs2016,pctmodsevmtbs2017,pctmodsevmtbs2018',
aoi = 'cat,ws', comid = sites$comid,
showAreaSqKm = FALSE, showPctFull = FALSE)

df3 = merge(df,df2, by = 'comid')

df4 <- sc_get_data(metric = 'hydrlcond,mast2008,mast2009,mast2013,mast2014,
pctfire2000,pctfire2001,pctfire2002,pctfire2003,pctfire2004,
pctfire2005,pctfire2006,pctfire2007,pctfire2008,pctfire2009,pctfire2010,
pctfrstloss2001,pctfrstloss2002,pctfrstloss2003,pctfrstloss2004,
pctfrstloss2005,pctfrstloss2006,pctfrstloss2007,pctfrstloss2008,
pctfrstloss2009,pctfrstloss2010,pctfrstloss2011,pctfrstloss2012,
pctfrstloss2013,precip2008,precip2009,precip8110,precip9120,
tmean2008,tmean2009,tmean8110,tmean9120,sand,clay',
aoi = 'cat,ws', comid = sites$comid,
showAreaSqKm = FALSE, showPctFull = FALSE)

df5 = merge(df3,df4, by = 'comid')
sites = merge(sites,df5, by = 'comid', all = T)

Pulling additional geospatial data not in StreamCat

Lastly, we pull in additional data that is not found in StreamCat. This includes PRISM climate data, aridity data, and Omernik Ecoregion data for each site’s coordinates (i.e., at the site location, NOT aggregated across its watershed). We also calculate mean aridity and dominant Ecoregion across site watersheds. Our net primary production layer is no longer available from NASA so this data set has been excluded from this data pull. I plan to update this workflow to include it again once it becomes available.

# Extract the mean aridity index within each site's watershed as well as each site's location
sites <- getAridity(df = sites, sf = site_watersheds)

# Extract Omernik ecoregion for each site's location
#sites <- getOmernikSite(df = sites) #Can't Extract from the EPA link in the function anymore

# Extract dominant Omernik ecoregion within each site's watershed
#sites <- getOmernikWs(df = sites, sf = site_watersheds)  #Can't Extract from the EPA link in the function anymore

# Extract PRSIM ppt, tmean, tmax, and tmin data for each site's location
sites <- getPRISM(df = sites)

# Extract mean chemistry values within each site's watershed as well as each site's locationa
sites <- getChemistry(df = sites, sf = site_watersheds)

# Link to original NPP data set:
# https://lpdaac.usgs.gov/products/mod17a3hgfv006/ "Terra MODIS Net Primary Production Yearly L4 Global 500 m SIN Grid products are currently unavailable due to unexpected errors in the input data. Please note that a newer version of MODIS land products is available and plans are being developed for the retirement of Version 6 MODIS data products. Users are advised to transition to the improved Version 6.1 products as soon as possible."

Export all the data.

final_df <- all_sites %>%
  full_join(sites, by = "comid") %>%
  mutate(site = coalesce(site.x, site.y)) %>%
  select(site, everything(), -site.x, -site.y)

write.csv(final_df,paste0("v4_RCSFA_Extracted_Geospatial_Data_StreamCat",Sys.Date(),".csv"), row.names = F)